import tensorflow as tfimport osimport sysimport collectionsmodule_path = os.path.abspath(os.path.join('..'))if module_path not in sys.path: sys.path.append(module_path)a = os.path.join(module_path, "strands_qsr_lib\qsr_lib\src3")sys.path.append(a)from rl import action_learnerfrom rl import value_estimatorfrom progress_learner import EventProgressEstimatorimport configimport project# Need to add this import to load classfrom project import Projectfrom importlib import reload# To load this, I have to add pickle.load(f, encoding='latin-1')# Otherwise it would give the bug# https://stackoverflow.com/questions/21129020/how-to-fix-unicodedecodeerror-ascii-codec-cant-decode-bytep = project.Project.load("../slidearound_hopstep_1.proj")reload(config) c = config.Config()from rl import block_movement_envreload(action_learner)reload(value_estimator)p.speedimport matplotlibfrom matplotlib import pyplot as plt%matplotlib inlinex
reload(block_movement_env)tf.reset_default_graph()sess = tf.Session()with tf.variable_scope("model") as scope: print('-------- Load progress model ---------') progress_estimator = EventProgressEstimator(is_training=False, name = p.name, config = c) saver = tf.train.Saver()saver.restore(sess, '../progress.mod')env = block_movement_env.BlockMovementEnv(c, p.speed, name = 'SlideAround', progress_estimator = progress_estimator, session = sess)env.default()env.render()env.step((0, [0.1, -0.7, 0.5]))env.render()env.step((0, [0.2, -0.2, 0.5]))env.render()env.step((0, [-0.17, 0.08, 0.5]))env.render()env.step((0, [-0.3, 0, 0.5]))env.render()env.replay()reload(config) c = config.Config()c.num_episodes = 500c.constraint_sigma = 0reload(block_movement_env)reload(value_estimator)reload(action_learner)tf.reset_default_graph()global_step = tf.Variable(0, name="global_step", trainable=False)sess = tf.Session()policy_est = value_estimator.PolicyEstimator(c)value_est = value_estimator.ValueEstimator(c)sess.run(tf.global_variables_initializer())with tf.variable_scope("model") as scope: print('-------- Load progress model ---------') progress_estimator = EventProgressEstimator(is_training=False, name = p.name, config = c) # Print out all variables that would be restoredfor variable in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model'): print (variable.name)saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model'))saver.restore(sess, '../progress.mod')action_ln = action_learner.ActionLearner(c, p, progress_estimator, policy_est, value_est, session = sess)action_policy = action_learner.random_actionpast_envs, stats = action_ln.policy_learn(action_policy, breadth = 1, verbose = False, choice = 'ACTOR_CRITIC', default = True)past_envs[499].env.replay()past_envs[60].env.replay()plotting.plot_episode_stats(stats, smoothing_window=5)plotting.plot_episode_stats(stats, smoothing_window=5)x
plotting.plot_episode_stats(stats, smoothing_window=5)print (plotting.__file__)import picklefrom plotting import EpisodeStatswith open('session.dat', 'rb') as f: stats = pickle.load(f, encoding='latin-1')plotting.plot_episode_stats(stats, smoothing_window=5)x
with open('session.500.dat', 'rb') as f: stats = pickle.load(f, encoding='latin-1')plotting.plot_episode_stats(stats, smoothing_window=5)import globx
# This looks kind of good# ..\session_data_actor_critic\session.data._0.0020_0.9500_0.0020_0.9700for fn in glob.glob('..\session_data_actor_critic\*'): print (fn) with open(fn, 'rb') as f: stats = pickle.load(f, encoding='latin-1') plotting.plot_episode_stats(stats, smoothing_window=5)with open('..\\abc\session.data._0.0020_0.9500_0.0020_0.9600', 'rb') as f: stats = pickle.load(f, encoding='latin-1')plotting.plot_episode_stats(stats, smoothing_window=5)import matplotlib%matplotlib inlineimport matplotlib.cm as cmimport matplotlib.mlab as mlabimport matplotlib.pyplot as pltmatplotlib.rcParams['xtick.direction'] = 'out'matplotlib.rcParams['ytick.direction'] = 'out'delta = 0.025x = np.arange(-3.0, 3.0, delta)y = np.arange(-2.0, 2.0, delta)X, Y = np.meshgrid(x, y)Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1)# difference of GaussiansZ = 10.0 * (Z2 - Z1)plt.figure()CS = plt.contour(X, Y, Z2, 3)plt.clabel(CS, inline=1, fontsize=10)plt.title('Simplest default with labels')Z1import matplotlib.pyplot as pltmean = [0, 0]cov = [[1, 0], [0, 100]]x, y = np.random.multivariate_normal(mean, cov, 100).Tplt.plot(x, y, 'x')plt.axis('equal')plt.show()xxxxxxxxxxa = np.array([2,3])a ** 2